Load required libraries
library("limma")
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library("ConsensusClusterPlus")
# Get `magrittr` pipe
`%>%` <- dplyr::`%>%`
Import files and tables
file_psi <- "/Users/naqvia/Desktop/AS-DMG/analyses/pan_cancer/results/pan_cancer_splicing.thr10.report_select.remDup.txt"
psi_tab <- read.table(file_psi, row.names = 1, sep = "\t",header = TRUE)
Consensus clustering
rnames <- psi_tab[,1]
row.names(psi_tab) <- psi_tab$Splice_ID
mat_hm <- data.matrix(psi_tab[,2:ncol(psi_tab)])
d=mat_hm
# d[1:5,1:5]
## reduce the dataset to the top 5% most variable genes, measured by median absolute deviation
mads=apply(d,1,mad)
d=d[rev(order(mads))[1:5417],] ## top 5% .05*108352
## the default settings of the agglomerative hierarchical clustering algorithm using Pearson correlation distance, so it is appropriate to gene median center d using
d = sweep(d,1, apply(d,1,median,na.rm=T))
## remove NAs
is.na(d) <- sapply(d, is.infinite)
d[is.na(d)] <- 0
d[is.nan(d)] <- 0
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="hc",distance="spearman",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="pam",distance="spearman",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## k-means/spearman
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="km",distance="spearman",seed=123,innerLinkage = "average", finalLinkage = "average")
## Note: The km (kmeans) option only supports a euclidean distance metric when supplying a data matrix. If you want to cluster a distance matrix, use a different algorithm such as 'hc' or 'pam'. Changing distance to euclidean
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="hc",distance="pearson",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## pam/pearson
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="pam",distance="pearson",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## k-means/pearson
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="km",distance="pearson",seed=123,innerLinkage = "average", finalLinkage = "average")
## Note: The km (kmeans) option only supports a euclidean distance metric when supplying a data matrix. If you want to cluster a distance matrix, use a different algorithm such as 'hc' or 'pam'. Changing distance to euclidean
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="hc",distance="euclidean",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## pam/euclidean
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="pam",distance="euclidean",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## k-means/euclidean
results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
title="clustering",clusterAlg="km",distance="euclidean",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered